##########################################################################
# Description: Replicates figures                                        #
# Author: Ozlem Tuncel                                                   #
# Title: Lecturer and Data Services Specialist                           #
# Affiliation: Georgia State University                                  #
# Department: Research Data Services & Political Science                 #
# Email: otuncelgurlek1@gsu.edu                                          #
# Date: 08/04/2025                                                       #
# R Version: R version 4.4.0 (2024-04-24 ucrt) -- "Puppy Cup"            #
# Computer: Dell Latitude 7450 x64-based PC                              #
# Processor: Intel(R) Core(TM) Ultra 7 165U, 2.10 GHz, 12 Cores          #
# BIOS version/date: Dell Inc. 1.3.0, 4/11/2024                          #
# OS: 22621.3880                                                         #
##########################################################################

# Clean slate
rm(list = ls())
set.seed(1234)
getwd()
# setwd() # use this to set the working directory

# Load library ----
library(tidyverse)  # version 2.0.0

# Import data ----
my_data <- read_csv("replication_data/elite_interview_data.csv")

# Figure 1 ----
# Distribution of articles using elite interviews over the years

# Data for total counts
total_data <- my_data |> 
  group_by(year) |> 
  summarise(count = n()) |>
  ungroup() |> 
  complete(year = seq(2000, 2023), fill = list(count = 0)) |>
  mutate(type_of_evidence = "All articles")

# Data for type_of_evidence
evidence_data <- my_data |> 
  group_by(year, type_of_evidence) |> 
  summarise(count = n()) |>
  ungroup() |> 
  complete(year = seq(2000, 2023), type_of_evidence, fill = list(count = 0)) |>
  mutate(type_of_evidence = recode(type_of_evidence, 
                                   "mixed" = "Mixed-methods", 
                                   "qualitative" = "Qualitative Methods"))

# Combine total and evidence data
combined_data <- bind_rows(total_data, evidence_data)

# Plot
year_fig <- ggplot(combined_data, aes(x = year, y = count, 
                          group = type_of_evidence, 
                          linetype = type_of_evidence, 
                          shape = type_of_evidence)) +
  geom_line(aes(color = type_of_evidence), size = 0.5) +
  geom_point(aes(color = type_of_evidence), size = 2) +
  scale_x_continuous(breaks = seq(2000, 2023, by = 1), 
                     limits = c(2000, 2023)) +
  scale_y_continuous(breaks = seq(0, 18, by = 2), limits = c(0, 18)) +
  scale_color_manual(values = c("All articles" = "black", 
                                "Mixed-methods" = "black", 
                                "Qualitative Methods" = "black")) +
  scale_linetype_manual(values = c("All articles" = "solid", 
                                   "Mixed-methods" = "dashed", 
                                   "Qualitative Methods" = "dotted")) +
  scale_shape_manual(values = c("All articles" = 16, 
                                "Mixed-methods" = 17, 
                                "Qualitative Methods" = 18)) +
  labs(x = "Year", y = "Frequency", color = "", linetype = "", shape = "") +
  theme_minimal() +
  theme(legend.position = "bottom", 
        axis.text.x = element_text(angle = 45, hjust = 1))

year_fig

ggsave(plot = year_fig, file = "figures/Figure1.pdf", 
       dpi = 300, width = 8, height = 4)

# Figure 2 ----
# Regional distribution of elite interviewing ####
region_fig <- my_data |> 
  filter(!is.na(region)) |> 
  group_by(region) |> 
  summarise(n = n()) |> 
  mutate(perc = n/sum(n)*100) |> 
  ggplot(aes(x = reorder(region, perc), y = perc)) +
  geom_col() +
  coord_flip() +
  labs(x = "Regions",
       y = "Percentage") +
  scale_y_continuous(breaks = seq(0, 22, by = 2), limits = c(0, 22)) +
  theme_minimal()

region_fig

ggsave(plot = region_fig, file = "figures/Figure2.pdf", 
       dpi = 300, width = 6, height = 4)

# Figure 3 ----
# Appendix use, anonymity decisions, and reporting IRB information over the years
appendix_figure <- my_data |> 
  select(year, appendix_interviews, IRB, anonym_explanation) |> 
  mutate(new_IRB = ifelse(IRB == "Yes", 1, 0),
         anonym = ifelse(anonym_explanation == "No", 0, 1)) |> 
  group_by(year) |> 
  summarize(
    total_entries = n(),
    count_appendix_interviews = sum(!is.na(appendix_interviews)),
    count_anonymity = sum(anonym),
    count_IRB = sum(!is.na(IRB))
  ) |> 
  complete(year = seq(2000, 2023), fill = list(count_appendix_interviews = 0,
                                               count_IRB = 0,
                                               count_anonymity = 0,
                                               total_entries = 0)) |> 
  ggplot() +
  geom_line(aes(x = year, y = count_appendix_interviews, 
                linetype ="Appendix Exists"), size = 0.5) +
  geom_point(aes(x = year, y = count_appendix_interviews)) +
  geom_line(aes(x = year, y = count_IRB, 
                linetype = "IRB Approved"), size = 0.5) +
  geom_point(aes(x = year, y = count_IRB)) +
  geom_line(aes(x = year, y = count_anonymity, 
                linetype = "Anonymity Explained"), size = 0.5) +
  geom_point(aes(x = year, y = count_anonymity)) +
  labs(x = "Year", y = "Frequency", linetype ="") +
  scale_x_continuous(breaks = seq(2000, 2023, by = 1), 
                     limits = c(2000, 2023)) +
  scale_y_continuous(breaks = seq(0, 20, by = 2)) +
  theme_minimal() +
  theme(legend.position = "bottom",
        axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_linetype_manual(values = c("solid", "dashed", "dotted"))

appendix_figure

ggsave(plot = appendix_figure, file = "figures/Figure3.pdf", 
       dpi = 300, width = 8, height = 4)